package cn.com.duiba.millionaire.center.spider.spdierjob;

import cn.com.duiba.millionaire.center.biz.service.SpiderPublicInfoService;
import cn.com.duiba.millionaire.center.spider.common.PageConsumer;
import cn.com.duiba.millionaire.center.spider.entity.PublicInfoEntity;
import com.alibaba.fastjson.JSONObject;
import org.assertj.core.util.Lists;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.selector.Html;

import java.util.List;
import java.util.Map;


/**
 * @author fengyibo
 * @date 2018/8/22 10:53
 */
@Component
public class XiGuaShuJuSpider {
    private static Map<Integer, String> map;

    private WebDriver webDriver;

    public WebDriver getWebDriver() {
        return webDriver;
    }

    public void setWebDriver(WebDriver webDriver) {
        this.webDriver = webDriver;
    }

    private static List<String> types = Lists.newArrayList("情感励志","搞笑趣闻","影音娱乐","旅游","运动","医疗健康","数码科技","汽车","餐饮美食",
            "女人时尚","房产家居","母婴","生活常识","时事资讯","政务","财经","地方","职场教育","早教幼教","小学教育","中学教育","大学校园");

    //数据排行页
    private static final String PUBLIC_RANK_URL = "http://data.xiguaji.com/Home#/Rank/Industry?tid=";
    private static int count = 0;
    @Autowired
    private SpiderPublicInfoService spiderPublicInfoService;

    public PageConsumer pageConsumer = page -> {
        int index = 2;

        //利用浏览器驱动内置加载动态页面，爬取数据
        String url = page.getUrl().get();
        webDriver.get(url);
        WebElement webElement = webDriver.findElement(By.xpath("/html"));
        String web = webElement.getAttribute("outerHTML");
        Html html = new Html(web);
        String detail = html.xpath("//html//body/text()").get();
        String[] arr = detail.split("     ");

        for (int i=0; i<arr.length; i++){
            PublicInfoEntity entity = new PublicInfoEntity();
            entity.setPublicName(html.xpath("//span["+index+"]/span/text()").get());
            entity.setWxName(html.xpath("//span["+index+"]/em/text()").get());
            String[] data = arr[i].trim().split(" ");
            entity.setFansNum(data[0]);
            entity.setHeadRedaNum(data[1]);
            entity.setSecondReadNum(data[2]);
            entity.setTims(Long.valueOf(data[3]));
            entity.setType(types.get(count/8));
            spiderPublicInfoService.insertPublicInfo(entity);
            index += 2;
        }
        count++;

    };
}
